********************************************************************************
*	PROJECT: Childhood confidence and long-term outcomes (PSID)
*	PURPOSE: Clean PSID files (household and individual)
*	PUBLISHED: August 2022
*	CONTACT: Hannah Ruebeck, hruebeck@mit.edu
********************************************************************************
clear all
set more off
set maxvar 10000
pause on

local path "/Users/XXXXX/Downloads/replication materials"

local RAW "`path'/raw"
local INDIV "`RAW'/Cross_year_indiv"
local CLEAN "`path'/clean"

/*
Description: This code pulls information from the PSID family files and the PSID cross-year individual 
file.  The output is two files called "cleaned_psid_indiv.dta" and "cleaned_psid_hh.dta", both
stored in the "clean" folder. The individual dataset is a long version of the cross-year individual file 
with each variable we need included. The household dataset is also a long dataset that appends together the 
household files in each year and harmonizes variable names across them. Variable values are also harmonized
across years.  
*/

* make cpi dataset from https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913- 
* this will put dollars into 1982-1984 dollars. To put into 2016 dollars, divide by the cpi/100 and then multiply by 240.0

clear
input pryr cpi
1996 156.9
1998 163.0
2000 172.2
2002 179.9
2004 188.9
2006 201.6
2008 215.3
2010 218.1
2012 229.6
2014 236.7
2016 240.0
2018 251.1
end

tempfile cpi
save `cpi', replace 

********************************************************************************
// PROGRAMS TO HELP WITH CLEANING
program is_kth_char_numeric, rclass
    syntax, str(string) k(int)
    local r = (strpos("`str'", "1")==`k' | ///
	       strpos("`str'", "2")==`k' | ///
	       strpos("`str'", "3")==`k' | ///
	       strpos("`str'", "4")==`k' | ///
	       strpos("`str'", "5")==`k' | ///
	       strpos("`str'", "6")==`k' | ///
	       strpos("`str'", "7")==`k' | ///
	       strpos("`str'", "8")==`k' | ///
	       strpos("`str'", "9")==`k')
    return local num = `r'
end 
	     
********************************************************************************
// BRING IN INDIVIDUAL CROSS-YEAR DATA

do "`INDIV'/J301741.do"
do "`INDIV'/J301741_formats.do"

gen uid = string(ER30001, "%04.0f") + string(ER30002, "%03.0f")
  
local keeplist ""
foreach var of varlist ER33301-ER34864 {
    local label: variable label `var'
    if strpos("`label'", "INTERVIEW NUMBER") > 0 | ///
       strpos("`label'", "MONTH INDIVIDUAL BORN") > 0 | ///
       strpos("`label'", "YEAR INDIVIDUAL BORN") > 0 | ///
       strpos("`label'", "RELATION TO ") > 0 | ///
       strpos("`label'", "MARITAL PAIRS INDICATOR") > 0 | ///
       strpos("`label'", "YEARS COMPLETED EDUCATION") > 0 | ///
       strpos("`label'", "H1 HEALTH STATUS") > 0 | ///
       strpos("`label'", "EMPLOYMENT STATUS") > 0 | ///
       strpos("`label'", "STATE WHERE BORN") > 0  {
           local keeplist "`keeplist' `var'"
       }
    }
    keep ER30001 ER30002 uid `keeplist'
    
    local stublist ""
    foreach var of varlist ER33301-ER34752 {
        local label: variable label `var'      
        local newname = subinstr("`label'", " ", "", .)
        local newname = lower("`newname'")
        if strpos("`newname'", "interviewnumber")>0 {
            local year = substr("`newname'", 1, 4)
	    local newname = substr("`newname'", 5, .) + substr("`year'", 3, 2)
        }
        if strpos("`newname'", "statewhereborn")>0 {
	    local newname = "fips" + substr("`newname'", -16, .)
        }
	if strpos("`newname'", "relationto")>0 {
	    local yr = substr("`newname'", -2, .)
	    local newname "reltohead`yr'"
	}
    rename `var' `newname'
    label var `newname' `var'
    
    local len = length("`newname'")
    local stublen = `len'-2
    local stub = substr("`newname'", 1, `stublen')
    if strpos("`stublist'", "`stub'")==0 local stublist "`stublist' `stub'"
}
 
di "`stublist'"
reshape long "`stublist'" , i(uid) j(year) string
destring year, replace
replace year = 1900+year if year>90
replace year = 2000+year if year<20


*******************
* clean variables *
******************* 

drop if interviewnumber==0
rename interviewnumber hh_interviewnum 

gen married = maritalpairsindicator>0 
replace employmentstatus = . if inlist(employmentstatus, 0, 8, 9) 
 
replace monthindividualborn = . if !inrange(monthindividualborn, 1, 12) 
replace yearindividualborn = . if !inrange(yearindividualborn, 1889, 2017) 
gen birthmoyr = ym(yearindividualborn, monthindividualborn) 
format birthmoyr %tm
bys uid: egen max_educ = max(yearscompletededucation) if yearscompletededucation<98
replace fipsstatewhereborn = . if inlist(fipsstatewhereborn, 99, 0)
bys uid: egen max_state = max(fipsstatewhereborn)
bys uid: egen min_state = min(fipsstatewhereborn)
assert max_state==min_state
drop min_state fipsstatewhereborn
rename max_state fipsstatewhereborn
label values h1healthstatus ER33326L
replace h1healthstatus = . if inlist(h1healthstatus, 0, 8, 9)
bys uid: egen h1healthstatus1996 = max(h1healthstatus)
drop h1healthstatus


foreach var in h1healthstatus1996  birthmoyr fipsstatewhereborn max_educ {
    bys uid: egen sd = sd(`var') if reltohead==10
    if inlist(`var', 0, .) local func "max(temp)"
    else local func "mode(temp), minmode"
    drop sd
	
    gen temp = `var' if reltohead==10
    bys year hh_interviewnum: egen hd_`var' = `func'
    drop temp
    
    bys uid: egen sd = sd(`var') if inlist(reltohead, 20, 22)
    if inlist(`var', 0, .) local func "max(temp)"
    else local func "mode(temp), minmode"
    drop sd

    gen temp = `var' if inlist(reltohead, 20, 22)
    bys year hh_interviewnum: egen wf_`var' = `func'
    drop temp
    } 
    


gen child = 1 if inlist(reltohead, 30)
sort year hh_interviewnum child yearindividualborn birthmoyr 
by year hh_interviewnum child: gen birthorder = _n if !mi(child) & !mi(birthmoyr)
by year hh_interviewnum child: gen numsib = _N if !mi(child) & !mi(birthmoyr)
replace birthorder = 0 if birthorder==1 & numsib==1
replace numsib = numsib-1

drop child
gen child = 1 if inlist(reltohead, 30, 33, 35, 38)
sort year hh_interviewnum child 
by year hh_interviewnum child: gen temp = _N if !mi(child) & !mi(birthmoyr)
bys year hh_interviewnum: egen numkids = max(temp)
drop temp

gen temp = hh_interviewnum if year==1997 
bys uid: egen famid1997_fe = max(temp)
drop temp

bys uid: egen mode_birthmoyr = mode(birthmoyr), minmode
gen temp = subinstr(string(mode_birthmoyr, "%tm"), "m", " ", 1)
split temp, destring
drop temp
rename temp1 birthyear
rename temp2 birthmonth
gen birthquarter = 1 if inrange(birthmonth, 1, 3)
replace birthquarter = 2 if inrange(birthmonth, 4, 6)
replace birthquarter = 3 if inrange(birthmonth, 7, 9)
replace birthquarter = 4 if inrange(birthmonth, 10, 12)

keep uid year ER30001 ER30002 hh_interviewnum reltohead married employmentstatus mode_birthmoyr birthmoyr birthyear birthmonth birthquarter birthorder numsib numkids max_educ yearscompletededucation fipsstatewhereborn h1healthstatus1996 famid1997_fe hd_* wf_* 
  
foreach var of varlist reltohead-birthquarter {
    rename `var' pi_`var'
}

sort ER30001 ER30002 year
 
save "`CLEAN'/cleaned_psid_indiv.dta", replace

********************************************************************************
// BRING IN FAMILY DATA IN EACH YEAR

local downloadnum = 269708
local wave = 0
local downloadnum = `downloadnum'+`wave'
forval year = 1997(2)2019 {

if `year'==2019 local downloadnum = 301737

do "`RAW'/psid_fam/fam_`year'/J`downloadnum'.do"
do "`RAW'/psid_fam/fam_`year'/J`downloadnum'_formats.do"

local ly = `year'-1
local keeplist ""
foreach var of varlist _all {
    local label: variable label `var'
    if strpos("`label'", "INTERVIEW #") > 0 | ///
       strpos("`label'", "FAMILY INTERVIEW") > 0 | ///
       strpos("`label'", "CURRENT STATE") > 0 | ///
	   strpos("`label'", "CURRENT REGION") > 0 | ///
       strpos("`label'", "# IN FU") > 0 | ///
       strpos("`label'", "AGE OF ") == 1 | ///
       (strpos("`label'", "SEX OF") > 0 & strpos("`label'", "SPOUSE")==0) | ///
       strpos("`label'", "# CHILDREN") > 0 | ///
       strpos("`label'", "MARITAL STATUS") > 0 | ///
       strpos("`label'", "COUPLE STATUS") > 0 | ///
       strpos("`label'", "CKPT: WTR WIFE IN FU") > 0 | ///
       strpos("`label'", "CKPT: WTR WIFE/WIFE IN FU") > 0 | ///
       strpos("`label'", "CKPT: WTR WIFE/'WIFE' IN FU") > 0 | ///
       strpos("`label'", "CKPT: WTR SPOUSE IN FU") > 0 | ///
       strpos("`label'", "TYPE DU") > 0 | ///
       strpos("`label'", "OWN/RENT") > 0 | ///
       strpos("`label'", "HOUSE VALUE") > 0 | ///
       strpos("`label'", "DOLLARS RENT") > 0 | ///
       (strpos("`label'", "DOLLARS PER WHAT") > 0 & strpos("`label'", "A20G")==0) | ///
       strpos("`label'", "DOLLLARS PER WHAT") > 0 | ///
       strpos("`label'", "PUBLIC OWND") > 0 | ///
(strpos("`label'", "1ST MENTION") > 0 & strpos("`label'", "HEATING")==0 & strpos("`label'", "WHY MOVED")==0) | /// whether head/wife working
       strpos("`label'", "UNEMPLOYMENT WEEKS") > 0 | ///
       strpos("`label'", "HOUSEWORK") > 0 | ///
       strpos("`label'", "FAM TOGET") > 0 | ///
       strpos("`label'", "REC FOOD STAMPS") > 0 | ///
       strpos("`label'", "WTR RECEIVED FOOD STAMPS IN `ly'") > 0 | ///
       strpos("`label'", "WTR RECEIVED FOOD STAMPS LAST YEAR") > 0 | ///
       strpos("`label'", "WTR ITEMIZE FOR TAX") > 0 | ///
       strpos("`label'", "RELIGIOUS PREF") > 0 | ///
       strpos("`label'", "RACE") > 0 | ///
       strpos("`label'", "MILIT SERV") > 0 | ///
       strpos("`label'", "WTR EVER IN MILITARY") > 0 | ///
       strpos("`label'", "WTR ATTEND COLLEGE") > 0 | ///
       strpos("`label'", "WTR ATTENDED COLLEGE") > 0 | ///
       strpos("`label'", "WTR GRADUATED HS") > 0 | ///
       strpos("`label'", "WTR RECD COLL DEG") > 0 | ///
       strpos("`label'", "WTR RECD COLLEGE DEG") > 0 | ///
       strpos("`label'", "HGHST YR COLL COMP") > 0 | ///
       strpos("`label'", "HGHST COLL DEG REC") > 0 | ///
       strpos("`label'", "HGHST COLLEGE DEGREE REC") > 0 | ///
       strpos("`label'", "HIGHST COLLEGE DEGREE REC") > 0 | ///
       strpos("`label'", "TAXABLE INCOME") > 0 | ///
       strpos("`label'", "SOCIAL SECURITY INCOME") > 0 | ///
       strpos("`label'", "TRANSFER INCOME") > 0 | ///
       strpos("`label'", "TOTAL FAMILY INCOME") > 0 | ///
       (strpos("`label'", "LABOR INCOME-") > 0 & strpos("`label'", "OTHER")==0 & strpos("`label'", "WHETHER")==0)| ///
       (strpos("`label'", "LABOR INCOME OF HEAD-") > 0 & strpos("`label'", "MISC")==0)| ///
       (strpos("`label'", "LABOR INCOME OF REF PERSON-") > 0 & strpos("`label'", "MISC")==0)| ///
       (strpos("`label'", "LABOR INCOME OF SPOUSE-") > 0 & strpos("`label'", "MISC")==0) | ///
       (strpos("`label'", "LABOR INCOME OF WIFE-") > 0 & strpos("`label'", "MISC")==0) | ///
       (strpos("`label'", "LABOR INCOME OF WIFE LAST YEAR") > 0 & strpos("`label'", "MISC")==0) | ///
       (strpos("`label'", "LABOR INCOME OF HEAD LAST YEAR") > 0 & strpos("`label'", "MISC")==0) | ///
       strpos("`label'", "MAIN OCC FOR JOB 1") > 0 | ///
       strpos("`label'", "MAIN IND FOR JOB 1") > 0 | ///
       strpos("`label'", "TOTAL HOURS OF WORK") > 0 | ///
       strpos("`label'", "WORK WEEKS") > 0 | ///
       strpos("`label'", "MAIN OCCUPATION") > 0 | ///
       strpos("`label'", "OCCUPATION-LAST JOB")>0 | ///
       strpos("`label'", "RURAL-URBAN CODE (BEALE-COLLAPSED)") > 0  {
           if strpos("`label'", "ACC")!=1 local keeplist "`keeplist' `var'"
       }
    }
     
    keep `keeplist'
   
    local last = ""
     
    foreach var of varlist _all {
        local label: variable label `var'
	local label = subinstr("`label'", "DE1 CKPT: ", "", .)
	local label = subinstr("`label'", "D1 CKPT: ", "", .)
	local label = subinstr("`label'", "FAMILY INTERVIEW (ID) NUMBER", "INTERVIEW NUM", .)
	if inrange(`year', 1997, 2001) {
	    is_kth_char_numeric, str("`label'") k(2)
	
	    if strpos("`label'", "A")==1 & `r(num)'==1 local prefix "hh"
	    else if strpos("`label'", "B")==1 & `r(num)'==1 local prefix "hd_e"
	    else if strpos("`label'", "C")==1 & `r(num)'==1 local prefix "hd_u"
	    else if strpos("`label'", "D")==1 & `r(num)'==1 local prefix "wf_e"
	    else if strpos("`label'", "E")==1 & `r(num)'==1 local prefix "wf_u"
	    else if strpos("`label'", "K")==1 & `r(num)'==1 local prefix "wf"
	    else if strpos("`label'", "L")==1 & `r(num)'==1 local prefix "hd"
	    else local prefix "hh"
	    
	    }
	else {
	    is_kth_char_numeric, str("`label'") k(2)
	    local r2 = `r(num)'	    
	    is_kth_char_numeric, str("`label'") k(3)
	    local r3 = `r(num)'
	    
	    if strpos("`label'", "A")==1 & `r2'==1 local prefix "hh"
	    else if (strpos("`label'", "BC")==1 & `r3'==1) | (strpos("`label'", "L")==1 ///
		& `r2'==1) local prefix "hd"	    
	    else if (strpos("`label'", "DE")==1 & `r3'==1) | (strpos("`label'", "K")==1 ///
		& `r2'==1) local prefix "wf"
	    else local prefix "hh"
	
	}
	    is_kth_char_numeric, str("`label'") k(2)
	    if "`prefix'" != "hh" local split = 1
	    else if "`prefix'"=="hh" & `r(num)'==1 & strpos("`label'", "INTERVIEW")==0 local split = 1
	    else local split = 0
	
	local newname = subinstr("`label'", "H-E", "", .)
	local newname = subinstr("`newname'", "HD-E", "", .)
	local newname = subinstr("`newname'", "H-U", "", .)
	local newname = subinstr("`newname'", "HD-U", "", .)
	local newname = subinstr("`newname'", "W-E", "", .)
	local newname = subinstr("`newname'", "WF-E", "", .)
	local newname = subinstr("`newname'", "W-U", "", .)
	local newname = subinstr("`newname'", "WF-U", "", .)
	local newname = subinstr("`newname'", "BEALE-COLLAPSED", "", .)
	local newname = subinstr("`newname'", "(HD)", "", .)
	local newname = subinstr("`newname'", "(RP)", "", .)
	local newname = subinstr("`newname'", "(WF)", "", .)
	local newname = subinstr("`newname'", "(SP)", "", .)
	local newname = subinstr("`newname'", "()", "", .)
	
	local newname = subinstr("`newname'", "REFERENCE PERSON", "HEAD", .)
	local newname = subinstr("`newname'", "REF PERSON", "HEAD", .)
	local newname = subinstr("`newname'", "REF PERSN", "HEAD", .)
	local newname = subinstr("`newname'", "SPOUSE", "WIFE", .)
	

	if "`prefix'"!="hh" {
	    local newname = subinstr("`newname'", "-WF", "", .)	    
	    local newname = subinstr("`newname'", "-W", "", .)
	    local newname = subinstr("`newname'", "-HD", "", .) 
	    local newname = subinstr("`newname'", "-H", "", .)
	}
	if "`prefix'"=="hh" {
	    local newname = subinstr("`newname'", "-WF", "-WIFE", .)
	    if strpos("`newname'", "WIFE")==0 local newname = subinstr("`newname'", "-W", "-WIFE", .) 
	    local newname = subinstr("`newname'", "-HD", "-HEAD", .) 
	    if strpos("`newname'", "HEAD")==0 local newname = subinstr("`newname'", "-H", "-HEAD", .) 
	    local newname = subinstr("`newname'", "OF HEAD", "-HEAD", .)
	    local newname = subinstr("`newname'", "OF WIFE", "-WIFE", .)
	}
	else {
	    local newname = subinstr("`newname'", "OF HEAD", "", .)
	    local newname = subinstr("`newname'", "OF WIFE", "", .)
	}
	local newname = subinstr("`newname'", "-MENTION", "", .)
	local newname = subinstr("`newname'", "+", "PLUS", .)
	local newname = subinstr("`newname'", "-", "_", .)
	local newname = subinstr("`newname'", "#", "NUM", .)
	local newname = subinstr("`newname'", "/WK", "PERWK", .)
	local newname = subinstr("`newname'", "WIFE/WIFE", "WIFE", .)
	local newname = subinstr("`newname'", "WIFE/'WIFE'", "WIFE", .)
	local newname = subinstr("`newname'", "/", "", .)
	local newname = subinstr("`newname'", "?", "", .)
	local newname = subinstr("`newname'", ":", "", .)
	local newname = subinstr("`newname'", "DOLLLARS", "DOLLARS", .)
	local newname = subinstr("`newname'", "PREFF", "PREF", .)
	if strpos("`newname'", "EMPLOYMENT STATUS") == 0 local newname = subinstr("`newname'", "1ST MENTION", "EMPLOYMENT STATUS", .)
	else local newname = subinstr("`newname'", "_1ST MENTION", "", .)
	local newname = subinstr("`newname'", "WK ", "WEEK", .)
	local newname = subinstr("`newname'", "WRK ", "WORK", .)
	local newname = subinstr("`newname'", "CODE", "", .)
	local newname = subinstr("`newname'", "TYPE DU", "TYPE DWELLING", .)
	local newname = subinstr("`newname'", "WTR RECEIVED FOOD STAMPS", "REC FOOD STAMPS", .)
	local newname = subinstr("`newname'", "WTR IN MILIT SERV", "WTR EVER IN MILITARY", .)
	local newname = subinstr("`newname'", "_SP", "_WIFE", .)
	local newname = subinstr("`newname'", "_RP", "_HEAD", .)
	
	if "`newname'"=="COUPLE STATUS _HEAD" local newname "HEADCOUPLESTATUS"
	if "`newname'"=="HEAD MARITAL STATUS" local newname "HEADMARITALSTATUS"
	local newname = subinstr("`newname'", "HEAD AND TAXABLE INCOME WIFE", "HEAD AND WIFE TAXABLE INCOME", .)
	local newname = subinstr("`newname'", "HEAD AND WIFE", "HDPLUSWF", .)
	local newname = subinstr("`newname'", "RP AND WIFE", "HDPLUSWF", .)
	
	if inlist("`prefix'", "wf", "hd") {
	    local newname = subinstr("`newname'", "_WIFE", "", .)
	    local newname = subinstr("`newname'", "_HEAD", "", .)
	}

	if `split'==1 {
	    local newstart = strpos("`newname'", " ")+1
	}
	else local newstart = 1
	local newname = substr("`newname'", `newstart', .)
	
	
	if "`prefix'"=="hh" & strpos("`newname'", "HEAD ")==1 {
	    local newname = subinstr("`newname'", "HEAD ", "", .)
	    local prefix "hd"
	}
	if "`prefix'"=="hh" & strpos("`newname'", "WIFE")==1 {
	    local newname = subinstr("`newname'", "WIFE ", "", .)
	    local prefix "wf"
	}
		
	if "`prefix'"=="hh" & strpos("`newname'", "_HEAD")>0 {
	    local newname = subinstr("`newname'", "_HEAD ", "", .)
	    local newname = subinstr("`newname'", "_HEAD", "", .)
	    local prefix "hd"
	}
	if "`prefix'"=="hh" & strpos("`newname'", "_WIFE")>0 {
	    local newname = subinstr("`newname'", "_WIFE ", "", .)
	    local newname = subinstr("`newname'", "_WIFE", "", .)
	    local prefix "wf"
	}
	
	
	local newname = subinstr("`newname'", " ", "", .)
	local newname = lower("`newname'")
	
	if strpos("`newname'", "dollarsperwhat")>0 {
	    local newname = subinstr("`last'", "hh_", "", 1)+"perwhat"
	}
	
	local newname = subinstr("`newname'", "ofotherfumembers", "ofum", .)
	local newname = subinstr("`newname'", "ofotherfumember", "ofum", .)
	local newname = subinstr("`newname'", "ofofums", "ofum", .)
	local newname = subinstr("`newname'", "totalfamilysocialsecurityincome", "socialsecurityincome", .)
	local newname = subinstr("`newname'", "houseworkhrs", "houseworkhours", .)
	if strpos("`newname'", "interviewnum")>0 local newname "interviewnum`year'"
	if strpos("`newname'", "religiouspreference")>0 local newname "religiouspref"
	if strpos("`newname'", "inpublicowndproj")>0 & "`last'"=="hh_dollarsrentperwhat" local newname "rentspublicowndproj"
	if strpos("`newname'", "inpublicowndproj")>0 & "`last'"!="hh_dollarsrentperwhat" local newname "liveinpublicowndproj"
	
	local newname  = subinstr("`newname'", "in`ly'", "_pryr", .)
	local newname  = subinstr("`newname'", "`ly'", "pryr", .)
	local newname  = subinstr("`newname'", "lastyear", "_pryr", .)
	local newname  = subinstr("`newname'", "pyr", "_pryr", .)
	
	local newname = subinstr("`newname'", "interviewnum`year'", "interviewnum", .)
	local newname = subinstr("`newname'", "workweeks", "totalweeksofwork", .)
	local newname = subinstr("`newname'", "completed", "comp", .)
	local newname = subinstr("`newname'", "attendcollege", "attendedcollege", .)
	local newname = subinstr("`newname'", "wtrrecdcolldeg", "wtrrecdcollegedegree", .)
	local newname = subinstr("`newname'", "hghstcolldegrec", "hghstcollegedegreerecd", .)
	local newname = subinstr("`newname'", "highstcollegedegreerecd", "hghstcollegedegreerecd", .)
	local newname = subinstr("`newname'", "income", "inc", .)

	
	foreach pf in hd wf {
	    if "`prefix'"=="`pf'" & substr("`newname'", -2, .)=="`pf'" {
	        local len = length("`newname'")
		local newlen = `len'-2
		local newname = substr("`newname'", 1, `newlen')
	    }
	}
	
	if strpos("`newname'", "employmentstatus")==1 {
	    local prefix = subinstr("`prefix'", "_e", "", .)
	}
	
	** year-specific cleaning
	if inlist(`year', 1997, 1999) {
	    foreach incomevar in laborinc totalfamilyinc taxableinc hdpluswftransferinc socialsecurityinc ofumtransferinc {
	        local newname = subinstr("`newname'", "`incomevar'", "`incomevar'_pryr", .)
	    }
	}
	
	if `year'==2003 {
	foreach incomevar in taxableincofum transferincofum {
	        local newname = subinstr("`newname'", "`incomevar'", "`incomevar'_pryr", .)
	    }
	}
	local newname = subinstr("`newname'", "wtritemizefortax", "wtritemizefortax_pryr", .)


	local newname = subinstr("`newname'", "taxableincofum", "ofumtaxableinc", .)
	local newname = subinstr("`newname'", "transferincofum", "ofumtransferinc", .)
	local newname = subinstr("`newname'", "totalofumtransferinc", "ofumtransferinc", .)
	local newname = subinstr("`newname'", "mainoccforjob1", "occupationcode", .)
	local newname = subinstr("`newname'", "mainindforjob1", "industrycode", .)
	
	local newname = subinstr("`newname'", "mainoccupation3digit", "occupationcode1970", .)	
	local newname = subinstr("`newname'", "occupation_lastjob", "occupationcode1970", .)	

	di "var: `var', `newname': prefix `prefix', `split', `newstart'"
	
	rename `var' `prefix'_`newname'
	local yr = substr("`year'", 3, 2)
	local shortvar = subinstr("`var'", "ER", "", 1)
	label var `prefix'_`newname' "`wave'`shortvar'"
	local shortname = substr("`prefix'_`newname'", 1, 25)
        local l_`shortname'_`yr' "`wave'`shortvar'" 
	local last "`prefix'_`newname'"

	
    }  
     
    gen year = `year'
  
    local downloadnum = `downloadnum'+1
    local wave = `wave'+1
 
    tempfile psid`year'
    save `psid`year''
}
 
use `psid1997', clear

forval year = 1999(2)2019 {
    append using `psid`year''
    foreach var of varlist _all {
        local label: variable label `var'
	local shortname = substr("`var'", 1, 25)
	local yr = substr("`year'", 3, 2)	
        if strpos("`label'", "`l_`shortname'_`yr''")==0 label var `var' "`label' `l_`shortname'_`yr''"
    }
}
order year hh_* hd_* wf_* 

 
* basic variable cleaning
replace hh_socialsecurityinc_pryr = hd_socialsecurityinc_pryr+wf_socialsecurityinc_pryr+hh_ofumsocialsecurityinc_pryr if mi(hh_socialsecurityinc_pryr)
drop hd_socialsecurityinc_pryr wf_socialsecurityinc_pryr hh_ofumsocialsecurityinc_pryr 

replace hh_wtrwifeinfu = 2 if hh_wtrwifeinfu==4 & year>=2017
replace hh_wtrwifeinfu = 1 if hh_wtrwifeinfu==3 & year>=2017
replace hh_wtrwifeinfu = 2 if hh_wtrwifeinfu==3 & year<2017
replace hh_wtrwifeinfu = 0 if hh_wtrwifeinfu==2
label define wtrwifeinfuL 1 "'Wife' interview occurs" 0 "No 'wife' interview occurs"
label values hh_wtrwifeinfu wtrwifeinfuL
rename hh_wtrwifeinfu hh_haswifeinterview

foreach pref in hd wf {
gen `pref'_educoutsideUSornoeduc = `pref'_wtrgraduatedhs==0
}
 
foreach var in hh_headmaritalstatus hh_maritalstatus_generated  hh_typedwelling hh_dollarsrentperwhat hh_famtogethrmainmeal hd_wtrgraduatedhs hd_hghstyrcollcomp wf_wtrgraduatedhs wf_hghstyrcollcomp  {
    replace `var' = . if inlist(`var', 0, 8, 9)
}

foreach var in hh_ownrentorwhat hh_currentregion {
    replace `var' = . if inlist(`var', 9)
}

foreach var of varlist hd_employmentstatus wf_employmentstatus hd_race* wf_race* {
    replace `var' = . if !inrange(`var', 1, 8)
}

foreach var in hh_currentstate hh_rural_urban  hd_hghstcollegedegreerecd wf_hghstcollegedegreerecd {
    replace `var' = . if inlist(`var', 0, 98, 99)
}
rename hh_currentstate hh_fipscurrentstate

foreach var in hd_religiouspref wf_religiouspref {
    replace `var' = . if inlist(`var', 98, 99)
}
replace wf_religiouspref = . if hh_haswifeinterview==0
label values wf_religiouspref ER11895L

foreach var in hd_age wf_age hd_houseworkhours wf_houseworkhours {
    replace `var' = . if inlist(`var', 998, 999)
}

foreach var of varlist hd_occupationcode2000 hd_industrycode2000 wf_occupationcode2000 wf_industrycode2000 *_occupationcode1970 {
    replace `var' = . if inlist(`var', 0, 999)
}


foreach var in hd_occupationcode2010 hd_industrycode2012 wf_occupationcode2010 wf_industrycode2012 {
    replace `var' = . if inlist(`var', 0, 9999)

}

// this makes this analogous to the 2003 onwards question because the u variable is just for most recent main job and the 2003 on variable asks about current or most recent main job
foreach pref in hd wf {
	replace `pref'_e_occupationcode1970 = `pref'_u_occupationcode1970 if mi(`pref'_e_occupationcode1970)
	rename `pref'_e_occupationcode1970 `pref'_occupationcode1970
	drop `pref'_u_occupationcode1970
}


foreach var in hh_dollarsrentifrent {
    replace `var' = . if inlist(`var', 0, 9998, 9999)
    replace `var' = . if hh_ownrentorwhat!=8
}

foreach var in hh_dollarsrent {
    replace `var' = . if inlist(`var', 0, 99998, 99999)
}

foreach var in hh_housevalue {
    replace `var' = . if inlist(`var', 0, 9999998, 9999999)
}

* cleaning existing indicators
label define indicator 1 "Yes" 0 "No"
foreach var in hh_rentspublicowndproj hh_liveinpublicowndproj hh_wtritemizefortax ///
	hd_sex hd_wtreverinmilitary wf_wtreverinmilitary hd_wtrattendedcollege hd_wtrrecdcollegedegree ///
	wf_wtrattendedcollege wf_wtrrecdcollegedegree hh_recfoodstamps_pryr {
    replace `var' = . if inlist(`var', 0, 8, 9)
    replace `var' = 0 if inlist(`var', 2, 5)
    label values `var' indicator
}
rename hd_sex hd_male

* extended cleaning to make useful vars:

* make indicators out of categories/cts vars
gen hh_rectransfers_pryr = hh_hdpluswftransferinc_pryr>0 | hh_ofumtransferinc_pryr>0
gen hh_recssi_pryr = hh_socialsecurityinc_pryr>0
gen hd_unemployed_pryr = hd_unemploymentweeks_pryr>0
gen wf_unemployed_pryr = wf_unemploymentweeks_pryr>0
gen hh_publichousing = 1 if hh_rentspublicowndproj==1 | hh_liveinpublicowndproj==1
replace hh_publichousing = 0 if mi(hh_publichousing) & ///
	(hh_rentspublicowndproj==0 | hh_liveinpublicowndproj==0 | hh_ownrentorwhat ==1)
gen hh_mainmealtogethgeq6days = hh_famtogethrmainmeal>=6 if !mi(hh_famtogethrmainmeal)

* clean income vars and put all dollars into 2016 dollars
rename (*hdpluswf*) (*hdwf*)
gen pryr = year - 1
merge m:1 pryr using `cpi', assert(3) nogen

sum cpi if pryr==2016
local cpi2016 = `r(mean)'
foreach var of varlist *inc* {
    gen `var'2016d = `var'/(cpi/100)
    replace `var'2016d = `var'2016d*(`cpi2016'/100)
    drop `var'
}
  
* clean education vars
foreach pref in hd wf {

    gen `pref'_atleast_grad_hs = 1 if `pref'_wtrgraduatedhs==1 | `pref'_wtrattendedcollege==1 ///
	| `pref'_wtrrecdcollegedegree==1
    replace `pref'_atleast_grad_hs = 0 if inlist(`pref'_wtrgraduatedhs, 2, 3)
    
    gen `pref'_atleast_grad_hs_ged = `pref'_atleast_grad_hs
    replace `pref'_atleast_grad_hs_ged = 1 if `pref'_wtrgraduatedhs==2
    
	
	
    gen `pref'_atleast_grad_bach = 1 if `pref'_wtrrecdcollegedegree==1 & inrange(`pref'_hghstcollegedegreerecd, 2, 6)
    replace `pref'_atleast_grad_bach = 0 if `pref'_wtrrecdcollegedegree ==0 | `pref'_wtrgraduatedhs==3 | `pref'_atleast_grad_hs_ged==0 | `pref'_wtrattendedcollege ==0 | `pref'_hghstcollegedegreerecd==1
    replace `pref'_atleast_grad_bach = 1 if `pref'_hghstcollegedegreerecd==97 & inlist(`pref'_hghstyrcollcomp, 4, 5)
    replace `pref'_atleast_grad_bach = 0 if `pref'_hghstcollegedegreerecd==97 & inlist(`pref'_hghstyrcollcomp, 1, 2, 3)
       replace `pref'_atleast_grad_bach = 0 if `pref'_wtrattendedcollege==1 & (mi(`pref'_hghstyrcollcomp) | mi(hd_hghstcollegedegreerecd)) & `pref'_educoutsideUSornoeduc==0
    
    gen `pref'_atleast_grad_postbach = 1 if inrange(`pref'_hghstcollegedegreerecd, 3, 6)
    replace `pref'_atleast_grad_postbach = 0 if inlist(`pref'_hghstcollegedegreerecd, 1, 2, 97) | `pref'_atleast_grad_bach==0
    
}

* marital status - clean after merging since there is just a marital status of head variable

foreach var of varlist wf_* hd_* {
    rename `var' hh_`var'
}

sort year hh_interviewnum

save "`CLEAN'/cleaned_psid_hh.dta", replace

